1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #define _GNU_SOURCE
7 #include <dirent.h>
8 #include <stdarg.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <ctype.h>
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <sched.h>
16 #include <stdio.h>
17
18 #include "utils.h"
19
20 #define MAX_MSG_LENGTH 1024
21 int config_debug;
22
23 /*
24 * err_msg - print an error message to the stderr
25 */
err_msg(const char * fmt,...)26 void err_msg(const char *fmt, ...)
27 {
28 char message[MAX_MSG_LENGTH];
29 va_list ap;
30
31 va_start(ap, fmt);
32 vsnprintf(message, sizeof(message), fmt, ap);
33 va_end(ap);
34
35 fprintf(stderr, "%s", message);
36 }
37
38 /*
39 * debug_msg - print a debug message to stderr if debug is set
40 */
debug_msg(const char * fmt,...)41 void debug_msg(const char *fmt, ...)
42 {
43 char message[MAX_MSG_LENGTH];
44 va_list ap;
45
46 if (!config_debug)
47 return;
48
49 va_start(ap, fmt);
50 vsnprintf(message, sizeof(message), fmt, ap);
51 va_end(ap);
52
53 fprintf(stderr, "%s", message);
54 }
55
56 /*
57 * get_llong_from_str - get a long long int from a string
58 */
get_llong_from_str(char * start)59 long long get_llong_from_str(char *start)
60 {
61 long long value;
62 char *end;
63
64 errno = 0;
65 value = strtoll(start, &end, 10);
66 if (errno || start == end)
67 return -1;
68
69 return value;
70 }
71
72 /*
73 * get_duration - fill output with a human readable duration since start_time
74 */
get_duration(time_t start_time,char * output,int output_size)75 void get_duration(time_t start_time, char *output, int output_size)
76 {
77 time_t now = time(NULL);
78 struct tm *tm_info;
79 time_t duration;
80
81 duration = difftime(now, start_time);
82 tm_info = gmtime(&duration);
83
84 snprintf(output, output_size, "%3d %02d:%02d:%02d",
85 tm_info->tm_yday,
86 tm_info->tm_hour,
87 tm_info->tm_min,
88 tm_info->tm_sec);
89 }
90
91 /*
92 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
93 *
94 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
95 * filling cpu_set_t argument.
96 *
97 * Returns 1 on success, 0 otherwise.
98 */
parse_cpu_set(char * cpu_list,cpu_set_t * set)99 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
100 {
101 const char *p;
102 int end_cpu;
103 int nr_cpus;
104 int cpu;
105 int i;
106
107 CPU_ZERO(set);
108
109 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
110
111 for (p = cpu_list; *p; ) {
112 cpu = atoi(p);
113 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
114 goto err;
115
116 while (isdigit(*p))
117 p++;
118 if (*p == '-') {
119 p++;
120 end_cpu = atoi(p);
121 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
122 goto err;
123 while (isdigit(*p))
124 p++;
125 } else
126 end_cpu = cpu;
127
128 if (cpu == end_cpu) {
129 debug_msg("cpu_set: adding cpu %d\n", cpu);
130 CPU_SET(cpu, set);
131 } else {
132 for (i = cpu; i <= end_cpu; i++) {
133 debug_msg("cpu_set: adding cpu %d\n", i);
134 CPU_SET(i, set);
135 }
136 }
137
138 if (*p == ',')
139 p++;
140 }
141
142 return 0;
143 err:
144 debug_msg("Error parsing the cpu set %s\n", cpu_list);
145 return 1;
146 }
147
148 /*
149 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
150 */
parse_seconds_duration(char * val)151 long parse_seconds_duration(char *val)
152 {
153 char *end;
154 long t;
155
156 t = strtol(val, &end, 10);
157
158 if (end) {
159 switch (*end) {
160 case 's':
161 case 'S':
162 break;
163 case 'm':
164 case 'M':
165 t *= 60;
166 break;
167 case 'h':
168 case 'H':
169 t *= 60 * 60;
170 break;
171
172 case 'd':
173 case 'D':
174 t *= 24 * 60 * 60;
175 break;
176 }
177 }
178
179 return t;
180 }
181
182 /*
183 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
184 */
parse_ns_duration(char * val)185 long parse_ns_duration(char *val)
186 {
187 char *end;
188 long t;
189
190 t = strtol(val, &end, 10);
191
192 if (end) {
193 if (!strncmp(end, "ns", 2)) {
194 return t;
195 } else if (!strncmp(end, "us", 2)) {
196 t *= 1000;
197 return t;
198 } else if (!strncmp(end, "ms", 2)) {
199 t *= 1000 * 1000;
200 return t;
201 } else if (!strncmp(end, "s", 1)) {
202 t *= 1000 * 1000 * 1000;
203 return t;
204 }
205 return -1;
206 }
207
208 return t;
209 }
210
211 /*
212 * This is a set of helper functions to use SCHED_DEADLINE.
213 */
214 #ifdef __x86_64__
215 # define __NR_sched_setattr 314
216 # define __NR_sched_getattr 315
217 #elif __i386__
218 # define __NR_sched_setattr 351
219 # define __NR_sched_getattr 352
220 #elif __arm__
221 # define __NR_sched_setattr 380
222 # define __NR_sched_getattr 381
223 #elif __aarch64__ || __riscv
224 # define __NR_sched_setattr 274
225 # define __NR_sched_getattr 275
226 #elif __powerpc__
227 # define __NR_sched_setattr 355
228 # define __NR_sched_getattr 356
229 #elif __s390x__
230 # define __NR_sched_setattr 345
231 # define __NR_sched_getattr 346
232 #endif
233
234 #define SCHED_DEADLINE 6
235
sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)236 static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
237 unsigned int flags) {
238 return syscall(__NR_sched_setattr, pid, attr, flags);
239 }
240
__set_sched_attr(int pid,struct sched_attr * attr)241 int __set_sched_attr(int pid, struct sched_attr *attr)
242 {
243 int flags = 0;
244 int retval;
245
246 retval = sched_setattr(pid, attr, flags);
247 if (retval < 0) {
248 err_msg("Failed to set sched attributes to the pid %d: %s\n",
249 pid, strerror(errno));
250 return 1;
251 }
252
253 return 0;
254 }
255
256 /*
257 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
258 *
259 * Check if the procfs entry is a directory of a process, and then check if the
260 * process has a comm with the prefix set in char *comm_prefix. As the
261 * current users of this function only check for kernel threads, there is no
262 * need to check for the threads for the process.
263 *
264 * Return: True if the proc_entry contains a comm file with comm_prefix*.
265 * Otherwise returns false.
266 */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)267 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
268 {
269 char buffer[MAX_PATH];
270 int comm_fd, retval;
271 char *t_name;
272
273 if (proc_entry->d_type != DT_DIR)
274 return 0;
275
276 if (*proc_entry->d_name == '.')
277 return 0;
278
279 /* check if the string is a pid */
280 for (t_name = proc_entry->d_name; t_name; t_name++) {
281 if (!isdigit(*t_name))
282 break;
283 }
284
285 if (*t_name != '\0')
286 return 0;
287
288 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
289 comm_fd = open(buffer, O_RDONLY);
290 if (comm_fd < 0)
291 return 0;
292
293 memset(buffer, 0, MAX_PATH);
294 retval = read(comm_fd, buffer, MAX_PATH);
295
296 close(comm_fd);
297
298 if (retval <= 0)
299 return 0;
300
301 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
302 if (retval)
303 return 0;
304
305 /* comm already have \n */
306 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
307
308 return 1;
309 }
310
311 /*
312 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
313 *
314 * This function uses procfs to list the currently running threads and then set the
315 * sched_attr *attr to the threads that start with char *comm_prefix. It is
316 * mainly used to set the priority to the kernel threads created by the
317 * tracers.
318 */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)319 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
320 {
321 struct dirent *proc_entry;
322 DIR *procfs;
323 int retval;
324
325 if (strlen(comm_prefix) >= MAX_PATH) {
326 err_msg("Command prefix is too long: %d < strlen(%s)\n",
327 MAX_PATH, comm_prefix);
328 return 1;
329 }
330
331 procfs = opendir("/proc");
332 if (!procfs) {
333 err_msg("Could not open procfs\n");
334 return 1;
335 }
336
337 while ((proc_entry = readdir(procfs))) {
338
339 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
340 if (!retval)
341 continue;
342
343 /* procfs_is_workload_pid confirmed it is a pid */
344 retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
345 if (retval) {
346 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
347 goto out_err;
348 }
349
350 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
351 }
352 return 0;
353
354 out_err:
355 closedir(procfs);
356 return 1;
357 }
358
359 #define INVALID_VAL (~0L)
get_long_ns_after_colon(char * start)360 static long get_long_ns_after_colon(char *start)
361 {
362 long val = INVALID_VAL;
363
364 /* find the ":" */
365 start = strstr(start, ":");
366 if (!start)
367 return -1;
368
369 /* skip ":" */
370 start++;
371 val = parse_ns_duration(start);
372
373 return val;
374 }
375
get_long_after_colon(char * start)376 static long get_long_after_colon(char *start)
377 {
378 long val = INVALID_VAL;
379
380 /* find the ":" */
381 start = strstr(start, ":");
382 if (!start)
383 return -1;
384
385 /* skip ":" */
386 start++;
387 val = get_llong_from_str(start);
388
389 return val;
390 }
391
392 /*
393 * parse priority in the format:
394 * SCHED_OTHER:
395 * o:<prio>
396 * O:<prio>
397 * SCHED_RR:
398 * r:<prio>
399 * R:<prio>
400 * SCHED_FIFO:
401 * f:<prio>
402 * F:<prio>
403 * SCHED_DEADLINE:
404 * d:runtime:period
405 * D:runtime:period
406 */
parse_prio(char * arg,struct sched_attr * sched_param)407 int parse_prio(char *arg, struct sched_attr *sched_param)
408 {
409 long prio;
410 long runtime;
411 long period;
412
413 memset(sched_param, 0, sizeof(*sched_param));
414 sched_param->size = sizeof(*sched_param);
415
416 switch (arg[0]) {
417 case 'd':
418 case 'D':
419 /* d:runtime:period */
420 if (strlen(arg) < 4)
421 return -1;
422
423 runtime = get_long_ns_after_colon(arg);
424 if (runtime == INVALID_VAL)
425 return -1;
426
427 period = get_long_ns_after_colon(&arg[2]);
428 if (period == INVALID_VAL)
429 return -1;
430
431 if (runtime > period)
432 return -1;
433
434 sched_param->sched_policy = SCHED_DEADLINE;
435 sched_param->sched_runtime = runtime;
436 sched_param->sched_deadline = period;
437 sched_param->sched_period = period;
438 break;
439 case 'f':
440 case 'F':
441 /* f:prio */
442 prio = get_long_after_colon(arg);
443 if (prio == INVALID_VAL)
444 return -1;
445
446 if (prio < sched_get_priority_min(SCHED_FIFO))
447 return -1;
448 if (prio > sched_get_priority_max(SCHED_FIFO))
449 return -1;
450
451 sched_param->sched_policy = SCHED_FIFO;
452 sched_param->sched_priority = prio;
453 break;
454 case 'r':
455 case 'R':
456 /* r:prio */
457 prio = get_long_after_colon(arg);
458 if (prio == INVALID_VAL)
459 return -1;
460
461 if (prio < sched_get_priority_min(SCHED_RR))
462 return -1;
463 if (prio > sched_get_priority_max(SCHED_RR))
464 return -1;
465
466 sched_param->sched_policy = SCHED_RR;
467 sched_param->sched_priority = prio;
468 break;
469 case 'o':
470 case 'O':
471 /* o:prio */
472 prio = get_long_after_colon(arg);
473 if (prio == INVALID_VAL)
474 return -1;
475
476 if (prio < MIN_NICE)
477 return -1;
478 if (prio > MAX_NICE)
479 return -1;
480
481 sched_param->sched_policy = SCHED_OTHER;
482 sched_param->sched_nice = prio;
483 break;
484 default:
485 return -1;
486 }
487 return 0;
488 }
489
490 /*
491 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
492 *
493 * This is used to reduce the exit from idle latency. The value
494 * will be reset once the file descriptor of /dev/cpu_dma_latecy
495 * is closed.
496 *
497 * Return: the /dev/cpu_dma_latecy file descriptor
498 */
set_cpu_dma_latency(int32_t latency)499 int set_cpu_dma_latency(int32_t latency)
500 {
501 int retval;
502 int fd;
503
504 fd = open("/dev/cpu_dma_latency", O_RDWR);
505 if (fd < 0) {
506 err_msg("Error opening /dev/cpu_dma_latency\n");
507 return -1;
508 }
509
510 retval = write(fd, &latency, 4);
511 if (retval < 1) {
512 err_msg("Error setting /dev/cpu_dma_latency\n");
513 close(fd);
514 return -1;
515 }
516
517 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
518
519 return fd;
520 }
521
522 #define _STR(x) #x
523 #define STR(x) _STR(x)
524
525 /*
526 * find_mount - find a the mount point of a given fs
527 *
528 * Returns 0 if mount is not found, otherwise return 1 and fill mp
529 * with the mount point.
530 */
find_mount(const char * fs,char * mp,int sizeof_mp)531 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
532 {
533 char mount_point[MAX_PATH+1];
534 char type[100];
535 int found = 0;
536 FILE *fp;
537
538 fp = fopen("/proc/mounts", "r");
539 if (!fp)
540 return 0;
541
542 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
543 if (strcmp(type, fs) == 0) {
544 found = 1;
545 break;
546 }
547 }
548 fclose(fp);
549
550 if (!found)
551 return 0;
552
553 memset(mp, 0, sizeof_mp);
554 strncpy(mp, mount_point, sizeof_mp - 1);
555
556 debug_msg("Fs %s found at %s\n", fs, mp);
557 return 1;
558 }
559
560 /*
561 * get_self_cgroup - get the current thread cgroup path
562 *
563 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
564 *
565 * 0::/user.slice/user-0.slice/session-3.scope'\n'
566 *
567 * This function is interested in the content after the second : and before the '\n'.
568 *
569 * Returns 1 if a string was found, 0 otherwise.
570 */
get_self_cgroup(char * self_cg,int sizeof_self_cg)571 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
572 {
573 char path[MAX_PATH], *start;
574 int fd, retval;
575
576 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
577
578 fd = open(path, O_RDONLY);
579 if (fd < 0)
580 return 0;
581
582 retval = read(fd, path, MAX_PATH);
583
584 close(fd);
585
586 if (retval <= 0)
587 return 0;
588
589 start = path;
590
591 start = strstr(start, ":");
592 if (!start)
593 return 0;
594
595 /* skip ":" */
596 start++;
597
598 start = strstr(start, ":");
599 if (!start)
600 return 0;
601
602 /* skip ":" */
603 start++;
604
605 if (strlen(start) >= sizeof_self_cg)
606 return 0;
607
608 snprintf(self_cg, sizeof_self_cg, "%s", start);
609
610 /* Swap '\n' with '\0' */
611 start = strstr(self_cg, "\n");
612
613 /* there must be '\n' */
614 if (!start)
615 return 0;
616
617 /* ok, it found a string after the second : and before the \n */
618 *start = '\0';
619
620 return 1;
621 }
622
623 /*
624 * set_comm_cgroup - Set cgroup to pid_t pid
625 *
626 * If cgroup argument is not NULL, the threads will move to the given cgroup.
627 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
628 *
629 * Supports cgroup v2.
630 *
631 * Returns 1 on success, 0 otherwise.
632 */
set_pid_cgroup(pid_t pid,const char * cgroup)633 int set_pid_cgroup(pid_t pid, const char *cgroup)
634 {
635 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
636 char cgroup_procs[MAX_PATH];
637 char pid_str[24];
638 int retval;
639 int cg_fd;
640
641 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
642 if (!retval) {
643 err_msg("Did not find cgroupv2 mount point\n");
644 return 0;
645 }
646
647 if (!cgroup) {
648 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
649 sizeof(cgroup_path) - strlen(cgroup_path));
650 if (!retval) {
651 err_msg("Did not find self cgroup\n");
652 return 0;
653 }
654 } else {
655 snprintf(&cgroup_path[strlen(cgroup_path)],
656 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
657 }
658
659 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
660
661 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
662
663 cg_fd = open(cgroup_procs, O_RDWR);
664 if (cg_fd < 0)
665 return 0;
666
667 snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
668
669 retval = write(cg_fd, pid_str, strlen(pid_str));
670 if (retval < 0)
671 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
672 pid_str, strerror(errno));
673 else
674 debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
675
676 close(cg_fd);
677
678 return (retval >= 0);
679 }
680
681 /**
682 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
683 *
684 * If cgroup argument is not NULL, the threads will move to the given cgroup.
685 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
686 *
687 * Supports cgroup v2.
688 *
689 * Returns 1 on success, 0 otherwise.
690 */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)691 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
692 {
693 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
694 char cgroup_procs[MAX_PATH];
695 struct dirent *proc_entry;
696 DIR *procfs;
697 int retval;
698 int cg_fd;
699
700 if (strlen(comm_prefix) >= MAX_PATH) {
701 err_msg("Command prefix is too long: %d < strlen(%s)\n",
702 MAX_PATH, comm_prefix);
703 return 0;
704 }
705
706 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
707 if (!retval) {
708 err_msg("Did not find cgroupv2 mount point\n");
709 return 0;
710 }
711
712 if (!cgroup) {
713 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
714 sizeof(cgroup_path) - strlen(cgroup_path));
715 if (!retval) {
716 err_msg("Did not find self cgroup\n");
717 return 0;
718 }
719 } else {
720 snprintf(&cgroup_path[strlen(cgroup_path)],
721 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
722 }
723
724 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
725
726 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
727
728 cg_fd = open(cgroup_procs, O_RDWR);
729 if (cg_fd < 0)
730 return 0;
731
732 procfs = opendir("/proc");
733 if (!procfs) {
734 err_msg("Could not open procfs\n");
735 goto out_cg;
736 }
737
738 while ((proc_entry = readdir(procfs))) {
739
740 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
741 if (!retval)
742 continue;
743
744 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
745 if (retval < 0) {
746 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
747 proc_entry->d_name, strerror(errno));
748 goto out_procfs;
749 }
750
751 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
752 }
753
754 closedir(procfs);
755 close(cg_fd);
756 return 1;
757
758 out_procfs:
759 closedir(procfs);
760 out_cg:
761 close(cg_fd);
762 return 0;
763 }
764
765 /**
766 * auto_house_keeping - Automatically move rtla out of measurement threads
767 *
768 * Try to move rtla away from the tracer, if possible.
769 *
770 * Returns 1 on success, 0 otherwise.
771 */
auto_house_keeping(cpu_set_t * monitored_cpus)772 int auto_house_keeping(cpu_set_t *monitored_cpus)
773 {
774 cpu_set_t rtla_cpus, house_keeping_cpus;
775 int retval;
776
777 /* first get the CPUs in which rtla can actually run. */
778 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
779 if (retval == -1) {
780 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
781 return 0;
782 }
783
784 /* then check if the existing setup is already good. */
785 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
786 if (!CPU_COUNT(&house_keeping_cpus)) {
787 debug_msg("rtla and the monitored CPUs do not share CPUs.");
788 debug_msg("Skipping auto house-keeping\n");
789 return 1;
790 }
791
792 /* remove the intersection */
793 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
794
795 /* get only those that rtla can run */
796 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
797
798 /* is there any cpu left? */
799 if (!CPU_COUNT(&house_keeping_cpus)) {
800 debug_msg("Could not find any CPU for auto house-keeping\n");
801 return 0;
802 }
803
804 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
805 if (retval == -1) {
806 debug_msg("Could not set affinity for auto house-keeping\n");
807 return 0;
808 }
809
810 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
811
812 return 1;
813 }
814