1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #include <dirent.h> 8 #include <stdarg.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <unistd.h> 12 #include <ctype.h> 13 #include <errno.h> 14 #include <fcntl.h> 15 #include <sched.h> 16 #include <stdio.h> 17 18 #include "utils.h" 19 20 #define MAX_MSG_LENGTH 1024 21 int config_debug; 22 23 /* 24 * err_msg - print an error message to the stderr 25 */ 26 void err_msg(const char *fmt, ...) 27 { 28 char message[MAX_MSG_LENGTH]; 29 va_list ap; 30 31 va_start(ap, fmt); 32 vsnprintf(message, sizeof(message), fmt, ap); 33 va_end(ap); 34 35 fprintf(stderr, "%s", message); 36 } 37 38 /* 39 * debug_msg - print a debug message to stderr if debug is set 40 */ 41 void debug_msg(const char *fmt, ...) 42 { 43 char message[MAX_MSG_LENGTH]; 44 va_list ap; 45 46 if (!config_debug) 47 return; 48 49 va_start(ap, fmt); 50 vsnprintf(message, sizeof(message), fmt, ap); 51 va_end(ap); 52 53 fprintf(stderr, "%s", message); 54 } 55 56 /* 57 * get_llong_from_str - get a long long int from a string 58 */ 59 long long get_llong_from_str(char *start) 60 { 61 long long value; 62 char *end; 63 64 errno = 0; 65 value = strtoll(start, &end, 10); 66 if (errno || start == end) 67 return -1; 68 69 return value; 70 } 71 72 /* 73 * get_duration - fill output with a human readable duration since start_time 74 */ 75 void get_duration(time_t start_time, char *output, int output_size) 76 { 77 time_t now = time(NULL); 78 struct tm *tm_info; 79 time_t duration; 80 81 duration = difftime(now, start_time); 82 tm_info = gmtime(&duration); 83 84 snprintf(output, output_size, "%3d %02d:%02d:%02d", 85 tm_info->tm_yday, 86 tm_info->tm_hour, 87 tm_info->tm_min, 88 tm_info->tm_sec); 89 } 90 91 /* 92 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument 93 * 94 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set 95 * filling cpu_set_t argument. 96 * 97 * Returns 1 on success, 0 otherwise. 98 */ 99 int parse_cpu_set(char *cpu_list, cpu_set_t *set) 100 { 101 const char *p; 102 int end_cpu; 103 int nr_cpus; 104 int cpu; 105 int i; 106 107 CPU_ZERO(set); 108 109 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 110 111 for (p = cpu_list; *p; ) { 112 cpu = atoi(p); 113 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) 114 goto err; 115 116 while (isdigit(*p)) 117 p++; 118 if (*p == '-') { 119 p++; 120 end_cpu = atoi(p); 121 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) 122 goto err; 123 while (isdigit(*p)) 124 p++; 125 } else 126 end_cpu = cpu; 127 128 if (cpu == end_cpu) { 129 debug_msg("cpu_set: adding cpu %d\n", cpu); 130 CPU_SET(cpu, set); 131 } else { 132 for (i = cpu; i <= end_cpu; i++) { 133 debug_msg("cpu_set: adding cpu %d\n", i); 134 CPU_SET(i, set); 135 } 136 } 137 138 if (*p == ',') 139 p++; 140 } 141 142 return 0; 143 err: 144 debug_msg("Error parsing the cpu set %s\n", cpu_list); 145 return 1; 146 } 147 148 /* 149 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds 150 */ 151 long parse_seconds_duration(char *val) 152 { 153 char *end; 154 long t; 155 156 t = strtol(val, &end, 10); 157 158 if (end) { 159 switch (*end) { 160 case 's': 161 case 'S': 162 break; 163 case 'm': 164 case 'M': 165 t *= 60; 166 break; 167 case 'h': 168 case 'H': 169 t *= 60 * 60; 170 break; 171 172 case 'd': 173 case 'D': 174 t *= 24 * 60 * 60; 175 break; 176 } 177 } 178 179 return t; 180 } 181 182 /* 183 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds 184 */ 185 long parse_ns_duration(char *val) 186 { 187 char *end; 188 long t; 189 190 t = strtol(val, &end, 10); 191 192 if (end) { 193 if (!strncmp(end, "ns", 2)) { 194 return t; 195 } else if (!strncmp(end, "us", 2)) { 196 t *= 1000; 197 return t; 198 } else if (!strncmp(end, "ms", 2)) { 199 t *= 1000 * 1000; 200 return t; 201 } else if (!strncmp(end, "s", 1)) { 202 t *= 1000 * 1000 * 1000; 203 return t; 204 } 205 return -1; 206 } 207 208 return t; 209 } 210 211 /* 212 * This is a set of helper functions to use SCHED_DEADLINE. 213 */ 214 #ifndef __NR_sched_setattr 215 # ifdef __x86_64__ 216 # define __NR_sched_setattr 314 217 # elif __i386__ 218 # define __NR_sched_setattr 351 219 # elif __arm__ 220 # define __NR_sched_setattr 380 221 # elif __aarch64__ || __riscv 222 # define __NR_sched_setattr 274 223 # elif __powerpc__ 224 # define __NR_sched_setattr 355 225 # elif __s390x__ 226 # define __NR_sched_setattr 345 227 # endif 228 #endif 229 230 #define SCHED_DEADLINE 6 231 232 static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, 233 unsigned int flags) { 234 return syscall(__NR_sched_setattr, pid, attr, flags); 235 } 236 237 int __set_sched_attr(int pid, struct sched_attr *attr) 238 { 239 int flags = 0; 240 int retval; 241 242 retval = sched_setattr(pid, attr, flags); 243 if (retval < 0) { 244 err_msg("Failed to set sched attributes to the pid %d: %s\n", 245 pid, strerror(errno)); 246 return 1; 247 } 248 249 return 0; 250 } 251 252 /* 253 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm 254 * 255 * Check if the procfs entry is a directory of a process, and then check if the 256 * process has a comm with the prefix set in char *comm_prefix. As the 257 * current users of this function only check for kernel threads, there is no 258 * need to check for the threads for the process. 259 * 260 * Return: True if the proc_entry contains a comm file with comm_prefix*. 261 * Otherwise returns false. 262 */ 263 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) 264 { 265 char buffer[MAX_PATH]; 266 int comm_fd, retval; 267 char *t_name; 268 269 if (proc_entry->d_type != DT_DIR) 270 return 0; 271 272 if (*proc_entry->d_name == '.') 273 return 0; 274 275 /* check if the string is a pid */ 276 for (t_name = proc_entry->d_name; t_name; t_name++) { 277 if (!isdigit(*t_name)) 278 break; 279 } 280 281 if (*t_name != '\0') 282 return 0; 283 284 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); 285 comm_fd = open(buffer, O_RDONLY); 286 if (comm_fd < 0) 287 return 0; 288 289 memset(buffer, 0, MAX_PATH); 290 retval = read(comm_fd, buffer, MAX_PATH); 291 292 close(comm_fd); 293 294 if (retval <= 0) 295 return 0; 296 297 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); 298 if (retval) 299 return 0; 300 301 /* comm already have \n */ 302 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); 303 304 return 1; 305 } 306 307 /* 308 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix 309 * 310 * This function uses procfs to list the currently running threads and then set the 311 * sched_attr *attr to the threads that start with char *comm_prefix. It is 312 * mainly used to set the priority to the kernel threads created by the 313 * tracers. 314 */ 315 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) 316 { 317 struct dirent *proc_entry; 318 DIR *procfs; 319 int retval; 320 321 if (strlen(comm_prefix) >= MAX_PATH) { 322 err_msg("Command prefix is too long: %d < strlen(%s)\n", 323 MAX_PATH, comm_prefix); 324 return 1; 325 } 326 327 procfs = opendir("/proc"); 328 if (!procfs) { 329 err_msg("Could not open procfs\n"); 330 return 1; 331 } 332 333 while ((proc_entry = readdir(procfs))) { 334 335 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 336 if (!retval) 337 continue; 338 339 /* procfs_is_workload_pid confirmed it is a pid */ 340 retval = __set_sched_attr(atoi(proc_entry->d_name), attr); 341 if (retval) { 342 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); 343 goto out_err; 344 } 345 346 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); 347 } 348 return 0; 349 350 out_err: 351 closedir(procfs); 352 return 1; 353 } 354 355 #define INVALID_VAL (~0L) 356 static long get_long_ns_after_colon(char *start) 357 { 358 long val = INVALID_VAL; 359 360 /* find the ":" */ 361 start = strstr(start, ":"); 362 if (!start) 363 return -1; 364 365 /* skip ":" */ 366 start++; 367 val = parse_ns_duration(start); 368 369 return val; 370 } 371 372 static long get_long_after_colon(char *start) 373 { 374 long val = INVALID_VAL; 375 376 /* find the ":" */ 377 start = strstr(start, ":"); 378 if (!start) 379 return -1; 380 381 /* skip ":" */ 382 start++; 383 val = get_llong_from_str(start); 384 385 return val; 386 } 387 388 /* 389 * parse priority in the format: 390 * SCHED_OTHER: 391 * o:<prio> 392 * O:<prio> 393 * SCHED_RR: 394 * r:<prio> 395 * R:<prio> 396 * SCHED_FIFO: 397 * f:<prio> 398 * F:<prio> 399 * SCHED_DEADLINE: 400 * d:runtime:period 401 * D:runtime:period 402 */ 403 int parse_prio(char *arg, struct sched_attr *sched_param) 404 { 405 long prio; 406 long runtime; 407 long period; 408 409 memset(sched_param, 0, sizeof(*sched_param)); 410 sched_param->size = sizeof(*sched_param); 411 412 switch (arg[0]) { 413 case 'd': 414 case 'D': 415 /* d:runtime:period */ 416 if (strlen(arg) < 4) 417 return -1; 418 419 runtime = get_long_ns_after_colon(arg); 420 if (runtime == INVALID_VAL) 421 return -1; 422 423 period = get_long_ns_after_colon(&arg[2]); 424 if (period == INVALID_VAL) 425 return -1; 426 427 if (runtime > period) 428 return -1; 429 430 sched_param->sched_policy = SCHED_DEADLINE; 431 sched_param->sched_runtime = runtime; 432 sched_param->sched_deadline = period; 433 sched_param->sched_period = period; 434 break; 435 case 'f': 436 case 'F': 437 /* f:prio */ 438 prio = get_long_after_colon(arg); 439 if (prio == INVALID_VAL) 440 return -1; 441 442 if (prio < sched_get_priority_min(SCHED_FIFO)) 443 return -1; 444 if (prio > sched_get_priority_max(SCHED_FIFO)) 445 return -1; 446 447 sched_param->sched_policy = SCHED_FIFO; 448 sched_param->sched_priority = prio; 449 break; 450 case 'r': 451 case 'R': 452 /* r:prio */ 453 prio = get_long_after_colon(arg); 454 if (prio == INVALID_VAL) 455 return -1; 456 457 if (prio < sched_get_priority_min(SCHED_RR)) 458 return -1; 459 if (prio > sched_get_priority_max(SCHED_RR)) 460 return -1; 461 462 sched_param->sched_policy = SCHED_RR; 463 sched_param->sched_priority = prio; 464 break; 465 case 'o': 466 case 'O': 467 /* o:prio */ 468 prio = get_long_after_colon(arg); 469 if (prio == INVALID_VAL) 470 return -1; 471 472 if (prio < MIN_NICE) 473 return -1; 474 if (prio > MAX_NICE) 475 return -1; 476 477 sched_param->sched_policy = SCHED_OTHER; 478 sched_param->sched_nice = prio; 479 break; 480 default: 481 return -1; 482 } 483 return 0; 484 } 485 486 /* 487 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy 488 * 489 * This is used to reduce the exit from idle latency. The value 490 * will be reset once the file descriptor of /dev/cpu_dma_latecy 491 * is closed. 492 * 493 * Return: the /dev/cpu_dma_latecy file descriptor 494 */ 495 int set_cpu_dma_latency(int32_t latency) 496 { 497 int retval; 498 int fd; 499 500 fd = open("/dev/cpu_dma_latency", O_RDWR); 501 if (fd < 0) { 502 err_msg("Error opening /dev/cpu_dma_latency\n"); 503 return -1; 504 } 505 506 retval = write(fd, &latency, 4); 507 if (retval < 1) { 508 err_msg("Error setting /dev/cpu_dma_latency\n"); 509 close(fd); 510 return -1; 511 } 512 513 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); 514 515 return fd; 516 } 517 518 #define _STR(x) #x 519 #define STR(x) _STR(x) 520 521 /* 522 * find_mount - find a the mount point of a given fs 523 * 524 * Returns 0 if mount is not found, otherwise return 1 and fill mp 525 * with the mount point. 526 */ 527 static const int find_mount(const char *fs, char *mp, int sizeof_mp) 528 { 529 char mount_point[MAX_PATH+1]; 530 char type[100]; 531 int found = 0; 532 FILE *fp; 533 534 fp = fopen("/proc/mounts", "r"); 535 if (!fp) 536 return 0; 537 538 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { 539 if (strcmp(type, fs) == 0) { 540 found = 1; 541 break; 542 } 543 } 544 fclose(fp); 545 546 if (!found) 547 return 0; 548 549 memset(mp, 0, sizeof_mp); 550 strncpy(mp, mount_point, sizeof_mp - 1); 551 552 debug_msg("Fs %s found at %s\n", fs, mp); 553 return 1; 554 } 555 556 /* 557 * get_self_cgroup - get the current thread cgroup path 558 * 559 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: 560 * 561 * 0::/user.slice/user-0.slice/session-3.scope'\n' 562 * 563 * This function is interested in the content after the second : and before the '\n'. 564 * 565 * Returns 1 if a string was found, 0 otherwise. 566 */ 567 static int get_self_cgroup(char *self_cg, int sizeof_self_cg) 568 { 569 char path[MAX_PATH], *start; 570 int fd, retval; 571 572 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); 573 574 fd = open(path, O_RDONLY); 575 if (fd < 0) 576 return 0; 577 578 retval = read(fd, path, MAX_PATH); 579 580 close(fd); 581 582 if (retval <= 0) 583 return 0; 584 585 start = path; 586 587 start = strstr(start, ":"); 588 if (!start) 589 return 0; 590 591 /* skip ":" */ 592 start++; 593 594 start = strstr(start, ":"); 595 if (!start) 596 return 0; 597 598 /* skip ":" */ 599 start++; 600 601 if (strlen(start) >= sizeof_self_cg) 602 return 0; 603 604 snprintf(self_cg, sizeof_self_cg, "%s", start); 605 606 /* Swap '\n' with '\0' */ 607 start = strstr(self_cg, "\n"); 608 609 /* there must be '\n' */ 610 if (!start) 611 return 0; 612 613 /* ok, it found a string after the second : and before the \n */ 614 *start = '\0'; 615 616 return 1; 617 } 618 619 /* 620 * set_comm_cgroup - Set cgroup to pid_t pid 621 * 622 * If cgroup argument is not NULL, the threads will move to the given cgroup. 623 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 624 * 625 * Supports cgroup v2. 626 * 627 * Returns 1 on success, 0 otherwise. 628 */ 629 int set_pid_cgroup(pid_t pid, const char *cgroup) 630 { 631 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 632 char cgroup_procs[MAX_PATH]; 633 char pid_str[24]; 634 int retval; 635 int cg_fd; 636 637 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 638 if (!retval) { 639 err_msg("Did not find cgroupv2 mount point\n"); 640 return 0; 641 } 642 643 if (!cgroup) { 644 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 645 sizeof(cgroup_path) - strlen(cgroup_path)); 646 if (!retval) { 647 err_msg("Did not find self cgroup\n"); 648 return 0; 649 } 650 } else { 651 snprintf(&cgroup_path[strlen(cgroup_path)], 652 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 653 } 654 655 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 656 657 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 658 659 cg_fd = open(cgroup_procs, O_RDWR); 660 if (cg_fd < 0) 661 return 0; 662 663 snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 664 665 retval = write(cg_fd, pid_str, strlen(pid_str)); 666 if (retval < 0) 667 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 668 pid_str, strerror(errno)); 669 else 670 debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 671 672 close(cg_fd); 673 674 return (retval >= 0); 675 } 676 677 /** 678 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 679 * 680 * If cgroup argument is not NULL, the threads will move to the given cgroup. 681 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 682 * 683 * Supports cgroup v2. 684 * 685 * Returns 1 on success, 0 otherwise. 686 */ 687 int set_comm_cgroup(const char *comm_prefix, const char *cgroup) 688 { 689 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 690 char cgroup_procs[MAX_PATH]; 691 struct dirent *proc_entry; 692 DIR *procfs; 693 int retval; 694 int cg_fd; 695 696 if (strlen(comm_prefix) >= MAX_PATH) { 697 err_msg("Command prefix is too long: %d < strlen(%s)\n", 698 MAX_PATH, comm_prefix); 699 return 0; 700 } 701 702 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 703 if (!retval) { 704 err_msg("Did not find cgroupv2 mount point\n"); 705 return 0; 706 } 707 708 if (!cgroup) { 709 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 710 sizeof(cgroup_path) - strlen(cgroup_path)); 711 if (!retval) { 712 err_msg("Did not find self cgroup\n"); 713 return 0; 714 } 715 } else { 716 snprintf(&cgroup_path[strlen(cgroup_path)], 717 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 718 } 719 720 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 721 722 debug_msg("Using cgroup path at: %s\n", cgroup_procs); 723 724 cg_fd = open(cgroup_procs, O_RDWR); 725 if (cg_fd < 0) 726 return 0; 727 728 procfs = opendir("/proc"); 729 if (!procfs) { 730 err_msg("Could not open procfs\n"); 731 goto out_cg; 732 } 733 734 while ((proc_entry = readdir(procfs))) { 735 736 retval = procfs_is_workload_pid(comm_prefix, proc_entry); 737 if (!retval) 738 continue; 739 740 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); 741 if (retval < 0) { 742 err_msg("Error setting cgroup attributes for pid:%s - %s\n", 743 proc_entry->d_name, strerror(errno)); 744 goto out_procfs; 745 } 746 747 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); 748 } 749 750 closedir(procfs); 751 close(cg_fd); 752 return 1; 753 754 out_procfs: 755 closedir(procfs); 756 out_cg: 757 close(cg_fd); 758 return 0; 759 } 760 761 /** 762 * auto_house_keeping - Automatically move rtla out of measurement threads 763 * 764 * Try to move rtla away from the tracer, if possible. 765 * 766 * Returns 1 on success, 0 otherwise. 767 */ 768 int auto_house_keeping(cpu_set_t *monitored_cpus) 769 { 770 cpu_set_t rtla_cpus, house_keeping_cpus; 771 int retval; 772 773 /* first get the CPUs in which rtla can actually run. */ 774 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); 775 if (retval == -1) { 776 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); 777 return 0; 778 } 779 780 /* then check if the existing setup is already good. */ 781 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 782 if (!CPU_COUNT(&house_keeping_cpus)) { 783 debug_msg("rtla and the monitored CPUs do not share CPUs."); 784 debug_msg("Skipping auto house-keeping\n"); 785 return 1; 786 } 787 788 /* remove the intersection */ 789 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); 790 791 /* get only those that rtla can run */ 792 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); 793 794 /* is there any cpu left? */ 795 if (!CPU_COUNT(&house_keeping_cpus)) { 796 debug_msg("Could not find any CPU for auto house-keeping\n"); 797 return 0; 798 } 799 800 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); 801 if (retval == -1) { 802 debug_msg("Could not set affinity for auto house-keeping\n"); 803 return 0; 804 } 805 806 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); 807 808 return 1; 809 } 810